home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Die Speccy' 97
/
Die Speccy' 97.iso
/
amiga_system
/
the_aminet
/
util
/
gnu
/
ispell_src.lha
/
ispell-3.1.18src
/
ijoin.c
< prev
next >
Wrap
C/C++ Source or Header
|
1994-11-02
|
14KB
|
461 lines
#ifndef lint
static char Rcs_Id[] =
"$Id: ijoin.c,v 1.6 1994/10/18 04:03:21 geoff Exp $";
#endif
/*
* Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All modifications to the source code must be clearly marked as
* such. Binary redistributions based on modified source code
* must be clearly marked as modified versions in the documentation
* and/or other materials provided with the distribution.
* 4. All advertising materials mentioning features or use of this software
* must display the following acknowledgment:
* This product includes software developed by Geoff Kuenning and
* other unpaid contributors.
* 5. The name of Geoff Kuenning may not be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* "Join" command for ispell.
*
* This command is a complete reimplementation of the UNIX "join"
* command, except that fields cannot be separated by a newline, it
* can handle lines of unlimited length, and the preceding sort can
* treat characters as either signed or unsigned.
*
* Usage:
*
* ijoin [options] file1 file2
*
* See the UNIX "join" manual page for option descriptions. Only
* nonstandard options are described here.
*
* Either file1 or file2 may be "-", in which case the standard input
* is used for that file.
*
* Normally, ijoin uses "strcmp" to compare fields. This is the
* correct thing to do on most systems if you are using the
* system-provided "sort" command to sort the input files before
* feeding them to ijoin. In some cases, however, the sort command
* you use will disagree with strcmp about how to handle characters
* with the high bit set. If this is the case, you can specify the
* "-s" (signed comparisons) or "-u" (unsigned comparisons) option to
* force ijoin to conform to the method used by the sort program.
* This is only necessary if one of the input files contains 8-bit
* characters in the field that is being joined on.
*
* On some older machines with non-ANSI compilers, the "-s" option
* will be ineffective because characters default to unsigned.
* However, this option should be unnecessary on those machines, so no
* harm will be done.
*/
/*
* $Log: ijoin.c,v $
* Revision 1.6 1994/10/18 04:03:21 geoff
* Fix a couple of bugs, one where the last field on a line could be
* output incorrectly, and one where fields from the wrong file could be
* output.
*
* Revision 1.5 1994/01/25 07:11:36 geoff
* Get rid of all old RCS log lines in preparation for the 3.1 release.
*
*/
#include <stdio.h>
#include "config.h"
#include "ispell.h"
#include "proto.h"
#include "fields.h"
#ifdef __STDC__
#define SIGNED signed
#else /* __STDC */
#define SIGNED
#endif /* __STDC */
int main P ((int argc, char * argv[])); /* Join files */
static void usage P ((void)); /* Issue a usage message */
static void dojoin P ((void)); /* Actually perform the join */
static void full_output P ((field_t * line1, field_t * line2));
/* Output everything from both lines */
static void selected_output P ((field_t * line1, field_t * line2));
/* Output selected fields */
static int strscmp P ((SIGNED char * a, SIGNED char * b));
/* Signed version of strcmp */
static int strucmp P ((unsigned char * a, unsigned char * b));
/* Unsigned version of strcmp */
typedef struct
{
int file; /* Number of file to output from */
int field; /* Number of field to output */
}
outlist_t; /* Output description list */
static int (*compare) () = strcmp; /* Comparison function */
static char * emptyfield = ""; /* Use this to replace empty fields */
static FILE * file1; /* First file to join */
static FILE * file2; /* Second file to join */
static int join1field = 0; /* Field to join file 1 on */
static int join2field = 0; /* Field to join file 2 on */
static int maxf[2] = {0, 0}; /* Max field to parse in each file */
static outlist_t * outlist = NULL; /* List of fields to write */
static int outlistsize; /* Number of elements in outlist */
static int runs = FLD_RUNS; /* Set if runs of tabchar same as 1 */
static char * tabchar = " \t"; /* Field separator character(s) */
static int unpairable1 = 0; /* NZ if -a1 */
static int unpairable2 = 0; /* NZ if -a2 */
extern int strcmp ();
int main (argc, argv) /* Join files */
int argc; /* Argument count */
char * argv[]; /* Argument vector */
{
while (argc > 3 && argv[1][0] == '-')
{
argc--;
argv++;
switch (argv[0][1])
{
case 'a': /* produce output for unpairables */
if (argv[0][2] == '1')
unpairable1 = 1;
else if (argv[0][2] == '2')
unpairable2 = 1;
else if (argv[0][2] == '\0')
unpairable1 = unpairable2 = 1;
else
usage ();
break;
case 'e': /* Replace empty fields with this */
argc--;
argv++;
emptyfield = *argv;
break;
case 'j': /* Specify field to join on */
if (argv[0][2] == '1')
join1field = atoi (argv[1]) - 1;
else if (argv[0][2] == '2')
join2field = atoi (argv[1]) - 1;
else if (argv[0][2] == '\0')
join1field = join2field = atoi (argv[1]) - 1;
else
usage ();
argc--;
argv++;
break;
case 'o': /* Specify output list */
/*
* We will assume that all remaining switch arguments
* are used to describe the output list. This will
* occasionally result in malloc'ing a few too many
* elements, but no real harm will be done.
*/
outlist =
(outlist_t *) malloc ((argc - 3) * sizeof (outlist_t));
if (outlist == NULL)
{
(void) fprintf (stderr, "ijoin: out of memory!\n");
return 1;
}
for (outlistsize = 0, argc--, argv++;
argc > 2 && (argv[0][0] == '1' || argv[0][0] == '2')
&& argv[0][1] == '.';
argc--, argv++, outlistsize++)
{
outlist[outlistsize].file = argv[0][0] - '0';
outlist[outlistsize].field = atoi (&argv[0][2]) - 1;
if (maxf[outlist[outlistsize].file - 1]
<= outlist[outlistsize].field)
maxf[outlist[outlistsize].file - 1] =
outlist[outlistsize].field + 1;
}
argc++; /* Un-do arg that stopped us */
argv--;
break;
case 't':
tabchar = &argv[0][2];
runs &= ~FLD_RUNS;
break;
case 's':
compare = strscmp;
break;
case 'u':
compare = strucmp;
break;
default:
usage ();
break;
}
}
if (argc != 3)
usage ();
if (strcmp (argv[1], "-") == 0)
file1 = stdin;
else
{
file1 = fopen (argv[1], "r");
if (file1 == NULL)
perror (argv[1]);
}
file2 = fopen (argv[2], "r");
if (file2 == NULL)
perror (argv[2]);
if (file1 == NULL || file2 == NULL)
return 1;
dojoin ();
return 0;
}
static void usage () /* Issue a usage message */
{
(void) fprintf (stderr,
"Usage: ijoin [-an] [-e s] [-jn m] [-o n.m ...] [-tc] file1 file2\n");
exit (1);
}
static void dojoin () /* Actually perform the join */
{
int comparison; /* Result of comparing the lines */
long file2pos; /* Position file 2 started at */
register field_t * line1; /* Line from file 1 */
register field_t * line2; /* Line from file 2 */
int pairable; /* NZ if lines can be paired */
int skip2; /* No. of "unpairable" 2's to skip */
runs |= FLD_NOSHRINK; /* Don't do excessive reallocations */
field_line_inc = BUFSIZ; /* Allocate line bfr in huge chunks */
line1 = fieldread (file1, tabchar, runs, maxf[0]);
file2pos = ftell (file2);
skip2 = 0;
if (file2pos == -1)
{
(void) fprintf (stderr, "ijoin: Can't seek file ");
perror ("2");
exit (1);
}
line2 = fieldread (file2, tabchar, runs, maxf[1]);
while (line1 != NULL || line2 != NULL)
{
/*
* Do a little work to reduce the number of calls to realloc, at
* the expense of slightly-increased memory usage.
*/
if (line1 != NULL && line1->nfields >= field_field_inc)
field_field_inc = line1->nfields + 1;
if (line2 != NULL && line2->nfields >= field_field_inc)
field_field_inc = line2->nfields + 1;
/*
* Determine if the lines can be paired.
*/
pairable = 1;
if (line1 == NULL)
{
pairable = 0;
comparison = 1; /* This causes file 2 to advance */
}
else if (join1field >= line1->nfields)
{
pairable = 0;
comparison = -1; /* This causes file 1 to advance */
}
if (line2 == NULL)
{
pairable = 0;
comparison = -1; /* This causes file 1 to advance */
}
else if (join2field >= line2->nfields)
{
pairable = 0;
comparison = 1; /* This causes file 2 to advance */
}
if (pairable)
{
comparison = (*compare) (line1->fields[join1field],
line2->fields[join2field]);
pairable = (comparison == 0);
}
if (pairable)
{
/*
* The two lines can be paired. Produce output.
*/
if (outlist == NULL)
full_output (line1, line2);
else
selected_output (line1, line2);
}
/*
* Advance through the files
*/
if (comparison < 0)
{
if (unpairable1)
{
if (outlist == NULL)
(void) fieldwrite (stdout, line1, tabchar[0]);
else
selected_output (line1, (field_t *) NULL);
}
fieldfree (line1);
line1 = fieldread (file1, tabchar, runs, maxf[0]);
}
else if (comparison > 0)
{
if (skip2 > 0)
skip2--;
else if (unpairable2)
{
if (outlist == NULL)
(void) fieldwrite (stdout, line2, tabchar[0]);
else
selected_output ((field_t *) NULL, line2);
}
fieldfree (line2);
file2pos = ftell (file2);
line2 = fieldread (file2, tabchar, runs, maxf[1]);
}
else
{
/*
* Here's the tricky part. We have to advance file 2
* until comparisons fail, and then back it up and advance
* file 1.
*/
skip2++;
fieldfree (line2);
line2 = fieldread (file2, tabchar, runs, maxf[1]);
if (line2 == NULL
|| join2field >= line2->nfields
|| (*compare) (line1->fields[join1field],
line2->fields[join2field])
!= 0)
{
(void) fseek (file2, file2pos, 0);
fieldfree (line2);
line2 = fieldread (file2, tabchar, runs, maxf[1]);
fieldfree (line1);
line1 = fieldread (file1, tabchar, runs, maxf[0]);
if (line1 != NULL && line2 != NULL
&& join1field < line1->nfields
&& join2field < line2->nfields
&& (*compare) (line1->fields[join1field],
line2->fields[join2field])
== 0)
skip2 = 0;
}
}
}
}
static void full_output (line1, line2) /* Output everything from both lines */
register field_t * line1; /* Line from file 1 */
register field_t * line2; /* Line from file 2 */
{
register int fieldno; /* Number of field being handled */
(void) fputs (line1->fields[join1field], stdout);
for (fieldno = 0; fieldno < line1->nfields; fieldno++)
{
if (fieldno == join1field)
continue;
(void) putchar (tabchar[0]);
if (line1->fields[fieldno][0] == '\0')
(void) fputs (emptyfield, stdout);
else
(void) fputs (line1->fields[fieldno], stdout);
}
for (fieldno = 0; fieldno < line2->nfields; fieldno++)
{
if (fieldno == join2field)
continue;
(void) putchar (tabchar[0]);
if (line2->fields[fieldno][0] == '\0')
(void) fputs (emptyfield, stdout);
else
(void) fputs (line2->fields[fieldno], stdout);
}
(void) putchar ('\n');
}
static void selected_output (line1, line2) /* Output selected fields */
field_t * line1; /* Line from file 1 */
field_t * line2; /* Line from file 2 */
{
register field_t * cline; /* Current line being handled */
register int listno; /* Number of output list entry */
for (listno = 0; listno < outlistsize; listno++)
{
if (listno != 0)
(void) putchar (tabchar[0]);
if (outlist[listno].file == 1)
cline = line1;
else
cline = line2;
if (cline == NULL
|| outlist[listno].field >= cline->nfields
|| cline->fields[outlist[listno].field][0] == '\0')
(void) fputs (emptyfield, stdout);
else
(void) fputs (cline->fields[outlist[listno].field], stdout);
}
(void) putchar ('\n');
}
static int strscmp (a, b) /* Compare signed strings */
register SIGNED char * a; /* First string to compare */
register SIGNED char * b; /* Second string to compare */
{
while (*a != '\0')
{
if (*a++ != *b++)
return *--a - *--b;
}
return *a - *b;
}
static int strucmp (a, b) /* Compare unsigned strings */
register unsigned char * a; /* First string to compare */
register unsigned char * b; /* Second string to compare */
{
while (*a != '\0')
{
if (*a++ != *b++)
return *--a - *--b;
}
return *a - *b;
}